{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import division, print_function\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# DNA microarray processing\n",
    "\n",
    "### Data in this example\n",
    "\n",
    "*Yeast microarrays for genome wide parallel genetic and gene\n",
    "expression analysis*\n",
    "\n",
    "<img src=\"../images/microarray.jpg\" width=\"200px\" style=\"float: left; padding-right: 1em;\"/>\n",
    "\n",
    "Two-color fluorescent scan of a yeast microarray containing 2,479 elements\n",
    "(ORFs). The center-to-center distance between elements is 345 μm. A probe\n",
    "mixture consisting of cDNA from yeast extract/peptone (YEP) galactose (green\n",
    "pseudocolor) and YEP glucose (red pseudocolor) grown yeast cultures was\n",
    "hybridized to the array. Intensity per element corresponds to ORF expression,\n",
    "and pseudocolor per element corresponds to relative ORF expression between the\n",
    "two cultures. \n",
    "\n",
    "by Deval A. Lashkari, http://www.pnas.org/content/94/24/13057/F1.expansion\n",
    "\n",
    "<div style=\"clear: both;\"></div>\n",
    "<br/>\n",
    "Learn more about microarrays:\n",
    "\n",
    "- [Tutorial on how to analyze microarray data](http://www.hhmi.org/biointeractive/how-analyze-dna-microarray-data)\n",
    "- [Complementary DNA](http://en.wikipedia.org/wiki/Complementary_DNA)\n",
    "\n",
    "More example data:\n",
    "\n",
    "- [MicroArray Genome Imaging & Clustering Tool](http://www.bio.davidson.edu/projects/MAGIC/MAGIC.html) by Laurie Heyer & team, Davidson College\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "from skimage import io, img_as_float"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "microarray = io.imread('../images/microarray.jpg')\n",
    "\n",
    "# Scale between zero and one\n",
    "microarray = img_as_float(microarray)\n",
    "\n",
    "plt.figure(figsize=(10, 5))\n",
    "plt.imshow(microarray[:500, :1000], cmap='gray', interpolation='nearest');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from skimage import color\n",
    "f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 10))\n",
    "\n",
    "red = microarray[..., 0]\n",
    "green = microarray[..., 1]\n",
    "\n",
    "red_rgb = np.zeros_like(microarray)\n",
    "red_rgb[..., 0] = red\n",
    "\n",
    "green_rgb = np.zeros_like(microarray)\n",
    "green_rgb[..., 1] = green\n",
    "\n",
    "ax0.imshow(green_rgb, interpolation='nearest')\n",
    "ax1.imshow(red_rgb, interpolation='nearest')\n",
    "plt.suptitle('\\n\\nPseudocolor plots of red and green channels', fontsize=16);"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from skimage import filter as filters\n",
    "\n",
    "mask = (green > 0.1)\n",
    "plt.imshow(mask[:1000, :1000], cmap='gray');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "z = red.copy()\n",
    "z /= green\n",
    "z[~mask] = 0\n",
    "\n",
    "print(z.min(), z.max())\n",
    "\n",
    "plt.imshow(z[:500, :500], cmap=plt.cm.gray, vmin=0, vmax=2);"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Locating the grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "both = (green + red)\n",
    "\n",
    "plt.imshow(both, cmap='gray');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from skimage import feature\n",
    "\n",
    "sum_down_columns = both.sum(axis=0)\n",
    "sum_across_rows = both.sum(axis=1)\n",
    "\n",
    "dips_columns = feature.peak_local_max(sum_down_columns.max() - sum_down_columns)\n",
    "dips_columns = dips_columns.ravel()\n",
    "\n",
    "M = len(dips_columns)\n",
    "column_distance = np.mean(np.diff(dips_columns))\n",
    "\n",
    "dips_rows = feature.peak_local_max(sum_across_rows.max() - sum_across_rows)\n",
    "dips_rows = dips_rows.ravel()\n",
    "\n",
    "N = len(dips_rows)\n",
    "row_distance = np.mean(np.diff(dips_rows))\n",
    "\n",
    "print('Columns are a mean distance of %.2f apart' % column_distance)\n",
    "print('Rows are a mean distance of %.2f apart' % row_distance)\n",
    "\n",
    "f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 5))\n",
    "\n",
    "ax0.plot(sum_down_columns)\n",
    "ax0.scatter(dips_columns, sum_down_columns[dips_columns])\n",
    "ax0.set_xlim(0, 200)\n",
    "ax0.set_title('Column gaps')\n",
    "\n",
    "ax1.plot(sum_across_rows)\n",
    "ax1.scatter(dips_rows, sum_across_rows[dips_rows])\n",
    "ax1.set_xlim(0, 200)\n",
    "ax0.set_title('Row gaps');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "P, Q = 500, 500\n",
    "\n",
    "plt.figure(figsize=(15, 10))\n",
    "plt.imshow(microarray[:P, :Q])\n",
    "\n",
    "for i in dips_rows[dips_rows < P]:\n",
    "    plt.plot([0, Q], [i, i], 'm')\n",
    "\n",
    "for j in dips_columns[dips_columns < Q]:\n",
    "    plt.plot([j, j], [0, P], 'm')\n",
    "\n",
    "plt.axis('image');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "out = np.zeros(microarray.shape[:2])\n",
    "\n",
    "for i in range(M - 1):\n",
    "    for j in range(N - 1):\n",
    "        row0, row1 = dips_rows[i], dips_rows[i + 1]\n",
    "        col0, col1 = dips_columns[j], dips_columns[j + 1]\n",
    "        \n",
    "        r = microarray[row0:row1, col0:col1, 0]\n",
    "        g = microarray[row0:row1, col0:col1, 1]\n",
    "        \n",
    "        ratio = r / g\n",
    "        mask = ~np.isinf(ratio)\n",
    "\n",
    "        mean_ratio = np.mean(ratio[mask])\n",
    "        if np.isnan(mean_ratio):\n",
    "            mean_ratio = 0\n",
    "        \n",
    "        out[row0:row1, col0:col1] = mean_ratio"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 10))\n",
    "\n",
    "ax0.imshow(microarray)\n",
    "ax0.grid(color='magenta', linewidth=1)\n",
    "\n",
    "ax1.imshow(out, cmap='gray', interpolation='nearest', vmin=0, vmax=3);\n",
    "ax1.grid(color='magenta', linewidth=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Transform the intensity to spot outliers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "f, (ax0, ax1) = plt.subplots(1, 2, figsize=(15, 10))\n",
    "\n",
    "ax0.imshow(microarray)\n",
    "ax0.grid(color='magenta', linewidth=1)\n",
    "\n",
    "ax1.imshow(np.log(0.5 + out), cmap='gray', interpolation='nearest', vmin=0, vmax=3);\n",
    "ax1.grid(color='magenta', linewidth=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "---\n",
    "\n",
    "<div style=\"height: 400px;\"></div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>\n",
       ".rendered_html {\n",
       "    font-family: Georgia, serif;\n",
       "    font-size: 130%;\n",
       "    line-height: 1.5;\n",
       "}\n",
       "\n",
       ".input {\n",
       "    width: 930px;\n",
       "}\n",
       "\n",
       ".inner_cell {\n",
       "    width: 800px;\n",
       "}\n",
       "\n",
       ".code_cell {\n",
       "    width: 800px;\n",
       "}\n",
       "\n",
       ".CodeMirror-sizer {\n",
       "}\n",
       "\n",
       "hr {\n",
       "    border: 1px solid #DDD;\n",
       "}\n",
       "\n",
       ".rendered_html h1 {\n",
       "    margin: 0.25em 0em 0.5em;\n",
       "    font-family: sans-serif;\n",
       "    color: #015C9C;\n",
       "    text-align: center;\n",
       "    line-height: 1.2;\n",
       "    page-break-before: always;\n",
       "}\n",
       "\n",
       ".rendered_html h2 {\n",
       "    margin: 1.1em 0em 0.5em;\n",
       "    font-family: sans-serif;\n",
       "    color: #26465D;\n",
       "    line-height: 1.2;\n",
       "}\n",
       "\n",
       ".rendered_html h3 {\n",
       "    font-family: sans-serif;\n",
       "    margin: 1.1em 0em 0.5em;\n",
       "    color: #002845;\n",
       "    line-height: 1.2;\n",
       "}\n",
       "\n",
       ".rendered_html li {\n",
       "    line-height: 1.5;\n",
       "}\n",
       "\n",
       ".CodeMirror-lines {\n",
       "    font-size: 110%;\n",
       "    line-height: 1.4em;\n",
       "    font-family: DejaVu Sans Mono, Consolas, Ubuntu, monospace;\n",
       "}\n",
       "\n",
       "h1.bigtitle {\n",
       "    margin: 4cm 1cm 4cm 1cm;\n",
       "    font-size: 300%;\n",
       "}\n",
       "\n",
       "h3.point {\n",
       "    font-size: 200%;\n",
       "    text-align: center;\n",
       "    margin: 2em 0em 2em 0em;\n",
       "    #26465D\n",
       "}\n",
       "\n",
       ".logo {\n",
       "    margin: 20px 0 20px 0;\n",
       "}\n",
       "\n",
       "a.anchor-link {\n",
       "    display: none;\n",
       "}\n",
       "\n",
       "h1.title {\n",
       "    font-size: 250%;\n",
       "}\n",
       "\n",
       ".exercize {\n",
       "    color: #738;\n",
       "}\n",
       "\n",
       "h2 .exercize {\n",
       "    font-style: italic;\n",
       "}\n",
       "\n",
       "</style>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML at 0x7f5f3e085b70>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "%reload_ext load_style\n",
    "%load_style ../themes/tutorial.css"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
